capture log close
clear

//set mem 2g

set more off

/* Store the variable choices for later years in a macro*/

local vars STATION_ID VIN START_DATE LIC_PLATE  ODOMETER  TRANS_TYPE FUEL_TYPE  INSP_REASN OVERALL OVRAL_EMIS OVRAL_VIS OVRAL_FUNC REPAIRS PREREPAIRS HC_MODE1 CO_MODE1 NO_MODE1 HC_MODE2 CO_MODE2 NO_MODE2 TEST_TYPE VEH_TST_WT VEH_TYP_CD

cd "/home/work/projects/smog_check/Smog Check Data/"
tempfile baaqmd 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012

log using append_gasprice.log, replace

/*	append_gasprice.do: Pulls out and appends the smog check records for the all cars checked 
	between 2002 and 2009 */
	



forvalues year = 1996/2001{
    foreach month in 01 02 03 04 05 06 07 08 09 10 11 12{
        unzipfile `year'/`year'`month'.zip, replace
	use `vars' using test`year'`month' if VIN != "" , replace
	/*Rename variables so they will match later years (and be less unwieldy)*/
        rename *, lower
        *tostring drv_config, replace
        rename start_date teststart
        
        capture confirm string variable station_id
        if _rc ==0{
            qui replace station_id = substr(station_id, 3,.) if length(station_id) == 8
            qui replace station_id = "" if real(station_id) == .
            destring station_id, replace
        }
        qui tostring prerepairs repairs , replace
        
	if "`month'" != "01" append using ``year''
	save ``year'', replace
	rm test`year'`month'.dta
    }
    
}




/*Match the later years, which have a different data structure*/

forvalues year = 2002/2010{
	unzipfile `year'.zip
	foreach month in 01 02 03 04 05 06 07 08 09 10 11 12{
                if (`year' == 2005 & "`month'"== "08") local time = ""
                else local time  START_TIME
		use `vars' `time'  if VIN != "" using test`year'`month'.dta
		/*Rename variables to match better and be less unwieldy*/
		rename *, lower
		rename start_date teststart
                capture confirm string variable station_id
                if _rc ==0{
                   qui replace station_id = substr(station_id, 3,.) if length(station_id) == 8
                   qui replace station_id = "" if real(station_id) == .
                   destring station_id, replace
                }
		
                qui tostring prerepairs repairs , replace
		/* Fix dates */
		if (`year' != 2005 | "`month'"!= "08") qui replace teststart = clock(regexs(1)+"/" + regexs(2) + "/" + regexs(3) + "/" + regexs(4) + "/" + regexs(5),"MDYhm") if regexm(string(teststart,"%20.0g")+start_time,"([0-9]|[0-9][0-9])([0-9][0-9])([0-9][0-9][0-9][0-9])([0-9][0-9]):([0-9][0-9]$)")

                if (`year' != 2005 | "`month'"!= "08") drop start_time
		if "`month'" != "01" append using ``year''
		save ``year'', replace
		rm test`year'`month'.dta
		if (`year' != 2005 | "`month'"!= "08") rm repair`year'`month'.dta
	}
}

/*For the latest years, right now, I only have full-year files from Jeffrey, which are gzipped*/

forvalues year = 2011/2012{
	sh gunzip test`year'.dta.gz

	use `vars' START_TIME  if VIN != "" using test`year'.dta
	/*Rename variables to match better and be less unwieldy*/
	rename *, lower
	rename start_date teststart
	capture confirm string variable station_id
	if _rc ==0{
		qui replace station_id = substr(station_id, 3,.) if length(station_id) == 8
		qui replace station_id = "" if real(station_id) == .
		destring station_id, replace
    }
		
    qui tostring prerepairs repairs, replace
	/* Fix dates */
	qui replace teststart = clock(regexs(1)+"/" + regexs(2) + "/" + regexs(3) + "/" + regexs(4) + "/" + regexs(5),"MDYhm") if regexm(string(teststart,"%20.0g")+start_time,"([0-9]|[0-9][0-9])([0-9][0-9])([0-9][0-9][0-9][0-9])([0-9][0-9]):([0-9][0-9]$)")

	drop start_time
	save ``year'', replace
	sh gzip test`year'.dta
}
		

drop _all

/* Append these into a single file */

use `1996', replace

forvalues year = 1997/2012{
	append using ``year''
        
}
bysort *: keep if _n == 1


save smog.dta, replace


